import scrapy
from bs4 import BeautifulSoup
from scrapy_redis.spiders import RedisSpider
from ..items import ItblogItem


class MySpider(RedisSpider):
    """Spider that reads urls from redis queue (myspider:start_urls)."""

    name = "cnblogs"
    redis_key = "myspider:start_urls"

    current_page = 1  # 当前页


    def parse(self, response):

        bs = BeautifulSoup(response.text, "lxml")
        sections = bs.select("#post_list article.post-item section.post-item-body")
        for section in sections:
            item = ItblogItem()
            # 博客标题详情页地址
            a = section.select_one("div.post-item-text a.post-item-title")
            item["detail_url"] = a["href"]
            item["title"] = a.text

            yield item

        self.current_page += 1  # 页码加1
        if self.current_page <= 100:
            # 构建下一页的url地址
            next_page = f"https://www.cnblogs.com/sitehome/p/{self.current_page}"

            # 发起请求
            yield scrapy.Request(next_page)










